1 Foreword

Until this vignette, we only used genes features to try to identify and analyze methylation. In this new one, we propose to use methylation features, which means CGI-related features to extract methylation matrix.

In a first part, we’ll present a per-gene visualisation tool and in the second one, a wide database visualisation trough heatmap pipeline, as in the previous vignettes.

Notably, a table named cgi_pf is used, of dimension 27k * 5, processed by hand from TCGA database and giving coordinates, width and center of each CpG islands (CGI) indexed on epic database.

2 CGI based gene visualisation

Firstly, we defined a plot_gene_cgi, showing CGI and probes in a given window (fixed here at [100k,100k]).

2.1 Code

targeted_genes <- penda_superup_deregulated
features <- get_features(targeted_genes, study = trscr_lusc, up_str = 7500, dwn_str = 7500)
plot_gene_cgi <- function(selected_gene = "OTX1",
                           window = c(100000,100000),
                           meth_platform = meth_lusc$platform,
                           meth_data = meth_lusc$data,
                           cgi_platform = cgi_pf,
                           features_list = features){
  
  
  
  TSS <- features_list[selected_gene,"TSS"]
  
  
  ## get closest cgi + cgi in range
  
  cgi_chr <- cgi_platform[which(as.character(cgi_platform[,"chr"]) == features_list[selected_gene,1]),]
  tmp <- cgi_chr[which(abs(cgi_chr[,"center"]-TSS) < mean(window)+5000),]
  cgi_of_interest<- cbind(tmp,abs(tmp[,"center"]-TSS))
  colnames(cgi_of_interest)<-c(colnames(tmp),"dist to TSS")
  closest_cgi <-cgi_chr[which(abs(cgi_chr[,"center"]-TSS)==min(abs(cgi_chr[,"center"]-TSS),na.rm=T)),]
  
  ## get probes
  
  tmp_probes <- dmprocr::get_probe_names(features_list[selected_gene,],
                                         meth_platform,
                                         "Chromosome",
                                         "Start",
                                         up_str=window[1],
                                         dwn_str=window[2])
  
  sub_epic <- meth_platform[tmp_probes, c(2,3,9)]
  
  ## cols of probes dots
  

  
  cols<-sapply(rownames(sub_epic), function(probe){
    
    if(sub_epic[probe,"Feature_Type"] == "Island")
      {col = "cornflowerblue"}

    
    if(sub_epic[probe,"Feature_Type"] == "S_Shore")
      {col = "chartreuse4"}

    
    if(sub_epic[probe,"Feature_Type"] == "S_Shelf")
      {col="gold"}
    
    
    if(sub_epic[probe,"Feature_Type"] == "N_Shore")
      {col = "chocolate"}

    
    if(sub_epic[probe,"Feature_Type"] == "N_Shelf")
      {col = "darkslategrey"}
      
    if(sub_epic[probe,"Feature_Type"] == ".")
      {col= "black"}
      
    
    return(col)
  })
  
  layout(matrix(1:2,1), respect=TRUE)
  
  plot(sub_epic$Start, rep(1,length(sub_epic$Start)), pch=19, xlim=c(closest_cgi[["center"]]-window[1],closest_cgi[["center"]]+window[2]), cex=1.1, yaxt="n",
       main = paste0("nprobes = ",nrow(sub_epic),", ncgi = ",nrow(cgi_of_interest)),
       xlab="Coordinates (in bp)",
       ylab="",
       ylim=c(0,2),
       col = cols)
  
  legend("topleft",
         c("Island","S_Shore","S_Shelf","N_Shore","N_Shelf","Opensea"),
         xpd = TRUE,
         inset=c(-0.34,0),
         pch=20,
         col=c("cornflowerblue","chartreuse4","gold","chocolate","darkslategrey","black"))

  
  
  text(TSS,0.4,labels = paste0("TSS : ", TSS),cex = 0.7)
  points(TSS, 0.5, pch=9, col="red")
  abline(h=0.5,lty=3)
  
  if(nrow(cgi_of_interest)>0){
    apply(t(t(rownames(cgi_of_interest))),1,function(cgi){
      
      rect(cgi_of_interest[cgi,"start"],1.4,cgi_of_interest[cgi,"end"],1.2,
           col = "red",
           border="black")
      
    })
  }
  
  ## Indexing probes per cgi
  
  tmp_cgi_index <- apply(t(t(rownames(sub_epic))),1, function(probe){
    foo <-apply(t(t(rownames(cgi_of_interest))),1,function(cgi){
      
      if(sub_epic[probe,"Start"] < cgi_of_interest[cgi,"end"] & sub_epic[probe,"Start"] >= cgi_of_interest[cgi,"start"]){
        rownames(cgi_of_interest[cgi,])}
    })
    
    index<-unlist(foo)
    if(!is.null(index)){
      names(index)<- probe
    }
    return(index)
  })
  
  cgi_index <- sort(unlist(tmp_cgi_index))
  
  methvals_of_interest <- meth_data[intersect(rownames(meth_data),names(cgi_index)),]
  methvals_of_interest<- methvals_of_interest[names(cgi_index),]
  
  
  colors=c("cyan", "black", "red")
  cols = colorRampPalette(colors)(100)
  
  breaks <- c(seq(0,0.33,length=35),
              seq(0.34,0.66,length=33),
              seq(0.67,1,length=33))
  
  image(methvals_of_interest,
        axes=FALSE,
        col=cols,
        main=paste0("nprobes indexed : ",nrow(methvals_of_interest)),
        breaks = breaks,
        ylab="Patients",
        xlab="probes")
  
  mtext(paste(noquote(selected_gene),"Methylation profile",sep= " "), outer=TRUE,  cex=2, line=-2)
  
  
} 

2.2 Output

plot_gene_cgi("FKBP4")

plot_gene_cgi("OTX1")

plot_gene_cgi("CDT1")

Note that probes are sorted by coordinates. Thus, the heatmap is readable from left to right. As we can see, there is no “neutral” CGI. We “loose” many probes between left and right plots due to index : we include in the heatmap only probes on CGIs. The three examples shows that there is different genes methylation profile : gene with CGI concentrated around TSS, gene with CGI distributed relatively evenly over the window (CDT1) and genes with very spread CGI (FKBP4).

3 Wide database heatmaps

3.1 Preprocess

To get our pipeline working, we need as usual a 2-level depth list. The following index probes per CGI features such as shore, shelf, opensea etc.

get_indexed_cgi <- function(window = c(100000,100000),
                            features_list = features,
                            cgi_platform = cgi_pf,
                            meth_data = meth_lusc$data,
                            meth_platform = meth_lusc$platform){
  
  
  cgi_indexed_probes<- epimedtools::monitored_apply(t(t(rownames(features_list))),mod = 20,1,function(gene){
    print(noquote(paste0("Indexing probes for gene ",gene," ...")))
    
    chr<-features_list[gene,1]
    TSS <- features_list[gene,"TSS"]
    
    meth_platform_chr<- meth_platform[rownames(meth_platform)[meth_platform[[1]] %in% chr],]
      tmp_probes <- dmprocr::get_probe_names(features_list[gene,], meth_platform_chr, "Chromosome", "Start", window[1], window[2])
    
    ## get probe for gene
    
    sub_platform <- meth_platform_chr[tmp_probes, c("Chromosome","Start","Feature_Type")]
    
    
    
    for (i in which(sub_platform[,3]==".")){
      if(sub_platform[i,"Start"] > TSS){sub_platform[i,"Feature_Type"] <- "upper_opensea"}
      if(sub_platform[i,"Start"] < TSS){sub_platform[i,"Feature_Type"] <- "lower_opensea"}  
      ### encode "." as opensea
    }
    features_type_names <- unique(sub_platform[,"Feature_Type"])
    
    foo<-apply(t(t(features_type_names)),1,function(type){
      
      
      tmp_type<-assign(paste0(type),rownames(sub_platform[which(sub_platform[,"Feature_Type"]==type),]))
      if(!is.list(tmp_type)){assign(paste0(type),list(tmp_type))}
      else{assign(paste0(type),tmp_type)}
      ## create a list of probes for each level of "Feature_Type" + very very very smart trick to deal with 1 length list becoming
      ## character
      

      
    })
    
    for (i in 1:length(foo)){
      if(is.list(foo[[i]])){foo[i]<-foo[[i]]}
    }
      
      
      
    
    names(foo)<- features_type_names
    
    
    
    
    
    ret = list(sub_epic = sub_platform,
               lower_opensea = foo$lower_opensea,
               S_shore = foo$S_Shore,
               S_shelf = foo$S_Shelf,
               Island = foo$Island,
               N_shelf = foo$N_Shelf,
               N_shore = foo$N_Shore,
               upper_opensea = foo$upper_opensea)
    
    return(ret)
    
    
  })
  
  names(cgi_indexed_probes)<- rownames(features_list)
  return(cgi_indexed_probes)
  
}

cgi_indexed_probes <- get_indexed_cgi()

3.2 Heatmaps superup

3.2.1 Tumoral tissues

map_cgi<-reduce_map(cgi_indexed_probes,c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))


means_per_regions_per_genes_per_patient_t <- reduce_rows(meth_tumoral,map_cgi, mean ,na.rm=T)
means_t <- subset_vals_per_bins(data = meth_tumoral,
                             values_per_patient = means_per_regions_per_genes_per_patient_t,
                             fun = mean,
                             binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))

meth_heatmap(means_t, main = "mean of means superup/tumoral")

sd_per_regions_per_genes_per_patient_t <- reduce_rows(meth_tumoral,map_cgi, sd ,na.rm=T)
sds_t <- subset_vals_per_bins(data = meth_tumoral,
                              values_per_patient = sd_per_regions_per_genes_per_patient_t,
                              fun = sd,
                              binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(sds_t, main = "mean of sd superup/tumoral")

rsd_per_regions_per_genes_per_patient_t <- reduce_rows(meth_tumoral,map_cgi, rsd ,na.rm=T)
rsds_t <- subset_vals_per_bins(data = meth_tumoral,
                            values_per_patient = rsd_per_regions_per_genes_per_patient_t,
                            fun = rsd,
                            binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(rsds_t, main = "mean of rsd superup/tumoral")

3.2.2 Healthy tissues

means_per_regions_per_genes_per_patient_h<- reduce_rows(meth_normal,map_cgi, mean ,na.rm=T)
means_h <- subset_vals_per_bins(data = meth_normal,
                              values_per_patient = means_per_regions_per_genes_per_patient_h,
                              fun = mean,
                              binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))

meth_heatmap(means_h, main = "mean of means superup/healthy")

sd_per_regions_per_genes_per_patient_h <- reduce_rows(meth_normal,map_cgi, sd ,na.rm=T)
sds_h <- subset_vals_per_bins(data = meth_normal,
                            values_per_patient = sd_per_regions_per_genes_per_patient_h,
                            fun = sd,
                            binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(sds_h, main = "mean of sd superup/healthy")

rsd_per_regions_per_genes_per_patient_h <- reduce_rows(meth_normal,map_cgi, rsd ,na.rm=T)
rsds_h <- subset_vals_per_bins(data = meth_normal,
                             values_per_patient = rsd_per_regions_per_genes_per_patient_h,
                             fun = rsd,
                             binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(rsds_h, main = "mean of rsd superup/healthy")

boxplot_res(means_h,means_t)

boxplot_res(sds_h,sds_t)

boxplot_res(rsds_h,rsds_t)

3.2.3 Differential values

means_per_regions_per_genes_per_patient_d<- reduce_rows(meth_diff,map_cgi, mean ,na.rm=T)
means_d <- subset_vals_per_bins(data = meth_diff,
                              values_per_patient = means_per_regions_per_genes_per_patient_d,
                              fun = mean,
                              binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))

meth_heatmap(means_d, main = "mean of means superup/differential")

sd_per_regions_per_genes_per_patient_d <- reduce_rows(meth_diff,map_cgi, sd ,na.rm=T)
sds_d <- subset_vals_per_bins(data = meth_diff,
                            values_per_patient = sd_per_regions_per_genes_per_patient_d,
                            fun = sd,
                            binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(sds_d, main = "mean of sd superup/differential")

rsd_per_regions_per_genes_per_patient_d <- reduce_rows(meth_diff,map_cgi, rsd ,na.rm=T)
rsds_d <- subset_vals_per_bins(data = meth_diff,
                             values_per_patient = rsd_per_regions_per_genes_per_patient_d,
                             fun = rsd,
                             binlist=c("lower_opensea","S_shore","S_shelf","Island","N_shelf","N_shore","upper_opensea"))
meth_heatmap(rsds_d, main = "mean of rsd superup/differential")

rsds_d

3.3 Heatmaps superdown

3.3.1 Tumoral tissues

3.3.2 Healthy tissues

3.3.3 Boxplots

3.3.4 Differential values

3.4 Heatmap superconserved

3.4.1 Tumoral tissues

3.4.2 Healthy tissues

3.4.3 Boxplots

3.4.4 Differential values